Libraries

library(ggplot2)
library(dplyr)
library(png)
library(grid)
library(plotly)
library(gganimate)
library(tidyr)
library(forecast)

Load data

load_csv <- function(csv_name, data_folder = 'data') {
  df <- read.csv(file.path(data_folder, csv_name))
  df
}

inventories_df <- load_csv('inventories.csv')
inventory_sets_df <- load_csv('inventory_sets.csv')
sets_df <- load_csv('sets.csv')
themes_df <- load_csv('themes.csv')
inventory_minifigs_df <- load_csv('inventory_minifigs.csv')
minifigs_df <- load_csv('minifigs.csv')
inventory_parts_df <- load_csv('inventory_parts.csv')
colors_df <- load_csv('colors.csv')
parts_df <- load_csv('parts.csv')
elements_df <- load_csv('elements.csv')
part_categories_df <- load_csv('part_categories.csv')
part_relationships_df <- load_csv('part_relationships.csv')

Basic tables analysis

Data schema

Tables

Inventories

id version set_num
1 1 7922-1
3 1 3931-1
4 1 6942-1
15 1 5158-1
16 1 903-1
17 1 850950-1
##        id            version         set_num         
##  Min.   :     1   Min.   : 1.000   Length:37265      
##  1st Qu.: 14424   1st Qu.: 1.000   Class :character  
##  Median : 54379   Median : 1.000   Mode  :character  
##  Mean   : 61104   Mean   : 1.091                     
##  3rd Qu.: 88842   3rd Qu.: 1.000                     
##  Max.   :194312   Max.   :16.000

Inventory sets

inventory_id set_num quantity
35 75911-1 1
35 75912-1 1
39 75048-1 1
39 75053-1 1
50 4515-1 1
50 4520-1 2
##        id            version         set_num         
##  Min.   :     1   Min.   : 1.000   Length:37265      
##  1st Qu.: 14424   1st Qu.: 1.000   Class :character  
##  Median : 54379   Median : 1.000   Mode  :character  
##  Mean   : 61104   Mean   : 1.091                     
##  3rd Qu.: 88842   3rd Qu.: 1.000                     
##  Max.   :194312   Max.   :16.000

Sets

set_num name year theme_id num_parts img_url
001-1 Gears 1965 1 43 https://cdn.rebrickable.com/media/sets/001-1.jpg
0011-2 Town Mini-Figures 1979 67 12 https://cdn.rebrickable.com/media/sets/0011-2.jpg
0011-3 Castle 2 for 1 Bonus Offer 1987 199 0 https://cdn.rebrickable.com/media/sets/0011-3.jpg
0012-1 Space Mini-Figures 1979 143 12 https://cdn.rebrickable.com/media/sets/0012-1.jpg
0013-1 Space Mini-Figures 1979 143 12 https://cdn.rebrickable.com/media/sets/0013-1.jpg
0014-1 Space Mini-Figures 1979 143 2 https://cdn.rebrickable.com/media/sets/0014-1.jpg
##    set_num              name                year         theme_id  
##  Length:21880       Length:21880       Min.   :1949   Min.   :  1  
##  Class :character   Class :character   1st Qu.:2001   1st Qu.:273  
##  Mode  :character   Mode  :character   Median :2012   Median :497  
##                                        Mean   :2008   Mean   :442  
##                                        3rd Qu.:2018   3rd Qu.:608  
##                                        Max.   :2024   Max.   :752  
##    num_parts         img_url         
##  Min.   :    0.0   Length:21880      
##  1st Qu.:    3.0   Class :character  
##  Median :   31.0   Mode  :character  
##  Mean   :  161.4                     
##  3rd Qu.:  139.0                     
##  Max.   :11695.0

Themes

id name parent_id
1 Technic NA
3 Competition 1
4 Expert Builder 1
16 RoboRiders 1
17 Speed Slammers 1
18 Star Wars 1
##        id            name             parent_id    
##  Min.   :  1.0   Length:468         Min.   :  1.0  
##  1st Qu.:250.5   Class :character   1st Qu.:186.0  
##  Median :466.0   Mode  :character   Median :411.0  
##  Mean   :433.5                      Mean   :360.6  
##  3rd Qu.:625.2                      3rd Qu.:512.5  
##  Max.   :752.0                      Max.   :697.0  
##                                     NA's   :145

Inventory Minifigs

inventory_id fig_num quantity
3 fig-001549 1
4 fig-000764 1
19 fig-000555 1
25 fig-000574 1
26 fig-000842 1
26 fig-008641 1
##   inventory_id      fig_num             quantity      
##  Min.   :     3   Length:20858       Min.   :  1.000  
##  1st Qu.:  7869   Class :character   1st Qu.:  1.000  
##  Median : 15681   Mode  :character   Median :  1.000  
##  Mean   : 43010                      Mean   :  1.062  
##  3rd Qu.: 66834                      3rd Qu.:  1.000  
##  Max.   :194312                      Max.   :100.000

Minifigs

fig_num name num_parts img_url
fig-000001 Toy Store Employee 4 https://cdn.rebrickable.com/media/sets/fig-000001.jpg
fig-000002 Customer Kid 4 https://cdn.rebrickable.com/media/sets/fig-000002.jpg
fig-000003 Assassin Droid, White 8 https://cdn.rebrickable.com/media/sets/fig-000003.jpg
fig-000004 Man, White Torso, Black Legs, Brown Hair 4 https://cdn.rebrickable.com/media/sets/fig-000004.jpg
fig-000005 Captain America with Short Legs 3 https://cdn.rebrickable.com/media/sets/fig-000005.jpg
fig-000006 Lloyd Avatar 5 https://cdn.rebrickable.com/media/sets/fig-000006.jpg
##    fig_num              name             num_parts         img_url         
##  Length:13764       Length:13764       Min.   :  0.000   Length:13764      
##  Class :character   Class :character   1st Qu.:  4.000   Class :character  
##  Mode  :character   Mode  :character   Median :  4.000   Mode  :character  
##                                        Mean   :  5.296                     
##                                        3rd Qu.:  5.000                     
##                                        Max.   :156.000

Inventory parts

inventory_id part_num color_id quantity is_spare img_url
1 48379c01 72 1 f https://cdn.rebrickable.com/media/parts/photos/1/48379c01-1-e7daa845-2671-4737-8642-3b1574308155.jpg
1 48395 7 1 f https://cdn.rebrickable.com/media/parts/photos/7/48395-7-b9152acf-2fa5-4836-a04d-5b7fd39c2406.jpg
1 stickerupn0077 9999 1 f
1 upn0342 0 1 f
1 upn0350 25 1 f
3 2343 47 1 f https://cdn.rebrickable.com/media/parts/elements/3000240.jpg
##   inventory_id      part_num            color_id         quantity      
##  Min.   :     1   Length:1180987     Min.   :  -1.0   Min.   :   1.00  
##  1st Qu.:  9404   Class :character   1st Qu.:   4.0   1st Qu.:   1.00  
##  Median : 22838   Mode  :character   Median :  15.0   Median :   2.00  
##  Mean   : 50849                      Mean   : 131.8   Mean   :   3.37  
##  3rd Qu.: 87088                      3rd Qu.:  71.0   3rd Qu.:   4.00  
##  Max.   :194312                      Max.   :9999.0   Max.   :3064.00  
##    is_spare           img_url         
##  Length:1180987     Length:1180987    
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

Colors

id name rgb is_trans
-1 [Unknown] 0033B2 f
0 Black 05131D f
1 Blue 0055BF f
2 Green 237841 f
3 Dark Turquoise 008F9B f
4 Red C91A09 f
##        id             name               rgb              is_trans        
##  Min.   :  -1.0   Length:263         Length:263         Length:263        
##  1st Qu.:  83.0   Class :character   Class :character   Class :character  
##  Median :1005.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 651.4                                                           
##  3rd Qu.:1070.5                                                           
##  Max.   :9999.0

Parts

part_num name part_cat_id part_material
003381 Sticker Sheet for Set 663-1 58 Plastic
003383 Sticker Sheet for Sets 618-1, 628-2 58 Plastic
003402 Sticker Sheet for Sets 310-3, 311-1, 312-3 58 Plastic
003429 Sticker Sheet for Set 1550-1 58 Plastic
003432 Sticker Sheet for Sets 357-1, 355-1, 940-1 58 Plastic
003434 Sticker Sheet for Set 575-2, 653-1, 460-1 58 Plastic
##    part_num             name            part_cat_id    part_material     
##  Length:52615       Length:52615       Min.   : 1.00   Length:52615      
##  Class :character   Class :character   1st Qu.:17.00   Class :character  
##  Mode  :character   Mode  :character   Median :41.00   Mode  :character  
##                                        Mean   :38.91                     
##                                        3rd Qu.:60.00                     
##                                        Max.   :68.00

Elements

element_id part_num color_id design_id
6443403 2277c01pr0009 1 2277
6300211 67906c01 14 67908
4566309 2564 0 2564
4275423 53657 1004 53657
6194308 92926 71 28967
6229123 26561 4 26561
##    element_id         part_num            color_id        design_id     
##  Min.   :    9327   Length:84138       Min.   :  -1.0   Min.   :  1001  
##  1st Qu.: 4259774   Class :character   1st Qu.:   8.0   1st Qu.: 18454  
##  Median : 6057754   Mode  :character   Median :  28.0   Median : 41748  
##  Mean   : 5222065                      Mean   : 539.7   Mean   : 45570  
##  3rd Qu.: 6262024                      3rd Qu.: 135.0   3rd Qu.: 75475  
##  Max.   :61532443                      Max.   :9999.0   Max.   :107520  
##                                                         NA's   :23682

Part Relationships

rel_type child_part_num parent_part_num
P 3626cpr3662 3626c
P 87079pr9974 87079
P 3960pr9971 3960
R 98653pr0003 98086pr0003
R 98653pr0003 98088pat0003
R 98653pr0003 98089pat0003
##    rel_type         child_part_num     parent_part_num   
##  Length:29977       Length:29977       Length:29977      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character

Part categories

id name
1 Baseplates
3 Bricks Sloped
4 Duplo, Quatro and Primo
5 Bricks Special
6 Bricks Wedged
7 Containers
##        id            name          
##  Min.   : 1.00   Length:66         
##  1st Qu.:19.25   Class :character  
##  Median :35.50   Mode  :character  
##  Mean   :35.36                     
##  3rd Qu.:51.75                     
##  Max.   :68.00

Dataframes size

All rows in dataframes:  1446639 
All columns in dataframes:  45 
All values in dataframes:  8099232 

Analysis deep dive

Themes

Themes sum of rows with NA value

## [1] 145

All NA values are in parent_id column

##   id name parent_id
## 1  0    0       145

Create themes_parent_df

id name
1 1 Technic
10 22 Creator
12 34 Make & Create
14 50 Town
16 52 City
43 112 Racers

Join themes_parent_df with themes_df, insert name_child into name_parent if name_parent is NA

id name_child name_parent
1 Technic Technic
3 Competition Technic
4 Expert Builder Technic
16 RoboRiders Technic
17 Speed Slammers Technic
18 Star Wars Technic

Sets

Sets sum of rows with NA value

## [1] 0

Delete 2024 and 2023 from dataframe

Correlation between year and num_parts

Pearson correlation between year and num_parts

## [1] 0.09825731

Join sets with prepared themes

156 Parent themes with number of sets and sum of parts

name_parent count_sets sum_parts
4 Juniors 54 2317
Adventurers 83 8458
Agents 28 11970
Alpha Team 31 4313
Angry Birds 6 2388
Aquazone 37 6577

Top 10 themes based on number of sets

## # A tibble: 15 × 3
##    name_parent             count_sets sum_parts
##    <chr>                        <int>     <int>
##  1 Gear                          3144      8630
##  2 Duplo                         1269     44002
##  3 Star Wars                      902    324609
##  4 Books                          869      7466
##  5 City                           812    166527
##  6 Collectible Minifigures        803      5661
##  7 Technic                        786    255869
##  8 Town                           677     84587
##  9 Educational and Dacta          672    127642
## 10 Friends                        514    101468
## 11 Service Packs                  501      7060
## 12 Ninjago                        490    122719
## 13 Creator                        489    199572
## 14 System                         464     21618
## 15 Bionicle                       460     32668
## # A tibble: 428 × 4
##     year name_parent count_sets sum_parts
##    <int> <chr>            <int>     <int>
##  1  1949 System               5       498
##  2  1950 System               6         6
##  3  1953 System               4        50
##  4  1954 System              14       117
##  5  1955 Books                1         0
##  6  1955 System              35       439
##  7  1956 Books                1         0
##  8  1956 System              17       295
##  9  1957 System              20       783
## 10  1958 System              57       992
## # ℹ 418 more rows

Parts

Parts sum of rows with NA value

## [1] 0

### Part categories #### Part categories sum of rows with NA value

## [1] 0

Join parts with part_categories

part_num part_cat_id part_material name_categories
003381 58 Plastic Stickers
003383 58 Plastic Stickers
003402 58 Plastic Stickers
003429 58 Plastic Stickers
003432 58 Plastic Stickers
003434 58 Plastic Stickers
## # A tibble: 116 × 3
## # Groups:   name_categories [66]
##    name_categories              part_material   count
##    <chr>                        <chr>           <int>
##  1 Bars, Ladders and Fences     Plastic           127
##  2 Baseplates                   Cardboard/Paper    16
##  3 Baseplates                   Plastic           223
##  4 Belville, Scala and Fabuland Cloth             245
##  5 Belville, Scala and Fabuland Foam               17
##  6 Belville, Scala and Fabuland Plastic           431
##  7 Belville, Scala and Fabuland Rubber              1
##  8 Bricks                       Plastic          1555
##  9 Bricks Curved                Plastic           592
## 10 Bricks Round and Cones       Plastic           445
## # ℹ 106 more rows

Part relationships

Part rategories sum of rows with NA value

## [1] 23682

All NA values are in design_id column

##   element_id part_num color_id design_id
## 1          0        0        0     23682

I don’t know what design_id reffers to (there is no table with design_id key) so I’m going to remove design_id column

Join parts_with_categories with elements

part_num part_cat_id part_material name_categories element_id color_id
003381 58 Plastic Stickers NA NA
003383 58 Plastic Stickers NA NA
003402 58 Plastic Stickers NA NA
003429 58 Plastic Stickers NA NA
003432 58 Plastic Stickers NA NA
003434 58 Plastic Stickers NA NA

Parts without any elements

part_num part_cat_id part_material name_categories
003381 58 Plastic Stickers
003383 58 Plastic Stickers
003402 58 Plastic Stickers
003429 58 Plastic Stickers
003432 58 Plastic Stickers
003434 58 Plastic Stickers

Parts with any elements

part_num part_cat_id part_material name_categories color_id
68 01023 58 Plastic Stickers 9999
69 01041 58 Plastic Stickers 9999
70 01343 58 Plastic Stickers 9999
71 01500 58 Plastic Stickers 9999
72 01501 58 Plastic Stickers 9999
73 01503 58 Plastic Stickers 9999

Categories of parts without any elements

## # A tibble: 104 × 3
## # Groups:   name_categories [65]
##    name_categories              part_material   count
##    <chr>                        <chr>           <int>
##  1 Bars, Ladders and Fences     Plastic            23
##  2 Baseplates                   Cardboard/Paper     6
##  3 Baseplates                   Plastic           191
##  4 Belville, Scala and Fabuland Cloth             213
##  5 Belville, Scala and Fabuland Foam               17
##  6 Belville, Scala and Fabuland Plastic           270
##  7 Belville, Scala and Fabuland Rubber              1
##  8 Bricks                       Plastic          1026
##  9 Bricks Curved                Plastic           141
## 10 Bricks Round and Cones       Plastic            66
## # ℹ 94 more rows

Categories of parts with any elements

## # A tibble: 95 × 3
## # Groups:   name_categories [64]
##    name_categories              part_material   count
##    <chr>                        <chr>           <int>
##  1 Bars, Ladders and Fences     Plastic          1192
##  2 Baseplates                   Cardboard/Paper    10
##  3 Baseplates                   Plastic           182
##  4 Belville, Scala and Fabuland Cloth              34
##  5 Belville, Scala and Fabuland Plastic           578
##  6 Bricks                       Plastic          2551
##  7 Bricks Curved                Plastic          2768
##  8 Bricks Round and Cones       Plastic          1855
##  9 Bricks Sloped                Plastic          2402
## 10 Bricks Special               Plastic          1250
## # ℹ 85 more rows

Colors

Colors sum of rows with NA value

## [1] 0

Join parts_with_categories_and_elements with colors

part_num part_cat_id part_material name_categories element_id color_id name rgb is_trans
003381 58 Plastic Stickers NA NA NA NA NA
003383 58 Plastic Stickers NA NA NA NA NA
003402 58 Plastic Stickers NA NA NA NA NA
003429 58 Plastic Stickers NA NA NA NA NA
003432 58 Plastic Stickers NA NA NA NA NA
003434 58 Plastic Stickers NA NA NA NA NA

## # A tibble: 181 × 3
## # Groups:   name [181]
##    name                 rgb    count_color
##    <chr>                <chr>        <int>
##  1 Aqua                 B3D7D1          63
##  2 Black                05131D        7803
##  3 Blue                 0055BF        2869
##  4 Bright Green         4B9F4A         704
##  5 Bright Light Blue    9FC3E9         514
##  6 Bright Light Orange  F8BB3D        1037
##  7 Bright Light Yellow  FFF03A         528
##  8 Bright Pink          E4ADC8         602
##  9 Brown                583927         818
## 10 Chrome Antique Brass 645A4C           1
## # ℹ 171 more rows

Most popular colors

## # A tibble: 22 × 4
## # Groups:   name [22]
##    name                rgb    count_color rgb_fill
##    <chr>               <chr>        <int> <chr>   
##  1 Black               05131D        7803 #05131D 
##  2 Blue                0055BF        2869 #0055BF 
##  3 Bright Light Orange F8BB3D        1037 #F8BB3D 
##  4 Dark Blue           0A3463        1819 #0A3463 
##  5 Dark Bluish Gray    6C6E68        2939 #6C6E68 
##  6 Dark Gray           6D6E5C        1018 #6D6E5C 
##  7 Dark Red            720E0F        1673 #720E0F 
##  8 Dark Tan            958A73        1059 #958A73 
##  9 Green               237841        1995 #237841 
## 10 Light Bluish Gray   A0A5A9        3321 #A0A5A9 
## # ℹ 12 more rows

Analysis in inventories

Minifigures

Largest stock of minifigures
name sum_quantity img_tag
Woman, Blue Torso with White Arms, White Legs 101
Battle Droid, One Bent Arm, One Straight Arm 96
Skeleton, Standard Face, Ball Joint Arms (3626b Head) 59
Classic Spaceman, White with Airtanks (3842a Helmet) 51
Classic Spaceman, Red with Airtanks (3842a Helmet) 50
Battle Droid, Two Bent Arms 50
Pit Crew, Red Torso, Red Legs, Ferrari 34
Man, Blue Shirt, Blue Legs, Red Hard Hat 30
Martian 29
Classic Spaceman, Yellow with Airtanks (3842b Helmet) 27
Steve 27

Parts

Largest stock of parts
name sum_quantity img_tag
Plate 1 x 2 127955
Plate Round 1 x 1 with Solid Stud 119278
Brick 1 x 2 99847
Plate 1 x 1 92323
Brick 1 x 1 75605
Technic Pin with Friction Ridges Lengthwise and Center Slots 68443
Plate 1 x 4 57805
Brick 2 x 2 55379
Slope 30° 1 x 1 x 2/3 (Cheese Slope) 50685
Tile Round 1 x 1 50410

Sets

Largest stock of sets
name sum_quantity img_tag
Series 19 - Random Bag 110
Unikitty! Series 1 - Random Bag 60
Series 9 - Random Bag 60
Series 10 - Random Bag 60
Series 11 - Random Bag 60
The LEGO Movie Series 1 - Random Bag 60
The Simpsons Series 1 - Random Bag 60
Series 12 - Random Bag 60
Series 13 - Random Bag 60
The Simpsons Series 2 - Random Bag 60

Forecasting